Using Ibis and Plotly - From Raw Data to Choropleth Maps

Background: Goal is to visualize which states and state counties have experienced the most gain or loss in population from 2010 to 2020

What data do we have? We have census data in its original form that we obtained from US census web site and saved it as a table in a local Postgres database

Code
import ibis
import pandas as pd
from ibis import _
ibis.options.interactive = True
pd.options.display.max_rows=1000
Code
con = ibis.postgres.connect(
    host='localhost',
    user='postgres',
    password='your_password',
    port=5432,
    database='analysis',
)
Code
con.list_tables()
['us_counties_pop_est_2010_2020_raw',
 'regions',
 'divisions',
 'npopchg_by_county_2010_2020',
 'npopchg_by_state_2010_2020',
 'us_states']

Obtaining the raw data - US population totals by State and County

The raw or original data we want is saved as a Postgres table called us_counties_pop_est_2010_2020_raw:

Code
us_counties_pop = con.table('us_counties_pop_est_2010_2020_raw')
Code
us_counties_pop
┏━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ sumlev  region  division  state   county  stname   ctyname          census2010pop  estimatesbase2010  popestimate2010  popestimate2011  popestimate2012  popestimate2013  popestimate2014  popestimate2015  popestimate2016  popestimate2017  popestimate2018  popestimate2019  popestimate2020  npopchg_2010  npopchg_2011  npopchg_2012  npopchg_2013  npopchg_2014  npopchg_2015  npopchg_2016  npopchg_2017  npopchg_2018  npopchg_2019  npopchg_2020  births2010  births2011  births2012  births2013  births2014  births2015  births2016  births2017  births2018  births2019  births2020  deaths2010  deaths2011  deaths2012  deaths2013  deaths2014  deaths2015  deaths2016  deaths2017  deaths2018  deaths2019  deaths2020  naturalinc2010  naturalinc2011  naturalinc2012  naturalinc2013  naturalinc2014  naturalinc2015  naturalinc2016  naturalinc2017  naturalinc2018  naturalinc2019  naturalinc2020  internationalmig2010  internationalmig2011  internationalmig2012  internationalmig2013  internationalmig2014  internationalmig2015  internationalmig2016  internationalmig2017  internationalmig2018  internationalmig2019  internationalmig2020  domesticmig2010  domesticmig2011  domesticmig2012  domesticmig2013  domesticmig2014  domesticmig2015  domesticmig2016  domesticmig2017  domesticmig2018  domesticmig2019  domesticmig2020  netmig2010  netmig2011  netmig2012  netmig2013  netmig2014  netmig2015  netmig2016  netmig2017  netmig2018  netmig2019  netmig2020  residual2010  residual2011  residual2012  residual2013  residual2014  residual2015  residual2016  residual2017  residual2018  residual2019  residual2020  gqestimatesbase2010  gqestimates2010  gqestimates2011  gqestimates2012  gqestimates2013  gqestimates2014  gqestimates2015  gqestimates2016  gqestimates2017  gqestimates2018  gqestimates2019  gqestimates2020  rbirth2011       rbirth2012       rbirth2013       rbirth2014       rbirth2015       rbirth2016       rbirth2017       rbirth2018       rbirth2019       rbirth2020       rdeath2011       rdeath2012       rdeath2013       rdeath2014       rdeath2015       rdeath2016       rdeath2017       rdeath2018       rdeath2019       rdeath2020       rnaturalinc2011  rnaturalinc2012  rnaturalinc2013  rnaturalinc2014  rnaturalinc2015  rnaturalinc2016  rnaturalinc2017  rnaturalinc2018  rnaturalinc2019  rnaturalinc2020  rinternationalmig2011  rinternationalmig2012  rinternationalmig2013  rinternationalmig2014  rinternationalmig2015  rinternationalmig2016  rinternationalmig2017  rinternationalmig2018  rinternationalmig2019  rinternationalmig2020  rdomesticmig2011  rdomesticmig2012  rdomesticmig2013  rdomesticmig2014  rdomesticmig2015  rdomesticmig2016  rdomesticmig2017  rdomesticmig2018  rdomesticmig2019  rdomesticmig2020  rnetmig2011      rnetmig2012      rnetmig2013      rnetmig2014      rnetmig2015      rnetmig2016      rnetmig2017      rnetmig2018      rnetmig2019      rnetmig2020     ┃
┡━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ stringint16int16stringstringstringstringint32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32int32decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10)decimal(15, 10) │
├────────┼────────┼──────────┼────────┼────────┼─────────┼─────────────────┼───────────────┼───────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────────┼────────────────┼────────────────┼────────────────┼────────────────┼────────────────┼────────────────┼────────────────┼────────────────┼────────────────┼────────────────┼──────────────────────┼──────────────────────┼──────────────────────┼──────────────────────┼──────────────────────┼──────────────────────┼──────────────────────┼──────────────────────┼──────────────────────┼──────────────────────┼──────────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼──────────────┼─────────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼───────────────────────┼───────────────────────┼───────────────────────┼───────────────────────┼───────────────────────┼───────────────────────┼───────────────────────┼───────────────────────┼───────────────────────┼───────────────────────┼──────────────────┼──────────────────┼──────────────────┼──────────────────┼──────────────────┼──────────────────┼──────────────────┼──────────────────┼──────────────────┼──────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┼─────────────────┤
│ 40    361     0     AlabamaAlabama        477973647801184785514479964248166324831586484373748548034866824487798948916284907965492153253961412816990149541215111066120211116513639163371356714202596995907457943589145965359695586445866557251567391107748837483635085349720518785171153196545605403858354312510862107117090919477757984544841053213-1615105351626282549441595095630135463940229320781225-1903-1142294-996-1553-21482287566410828131152278325961687788316335424153583396041312115193-7711176-206-251-116-116-703-1111618511624211518411581011694611905211998011864111711111660011677811676712.456552611012.286255571012.011129931012.178198082012.301439186012.280866155012.035941582012.009682672011.684362810011.544639568010.190131491010.058573622010.541428479010.277693055010.698105076010.638342738010.917808274011.169322196011.028621291011.87324234402.26642112032.22768194831.46970145161.90050502711.60333411011.64252341711.11813330850.84036047680.6557415191-0.32860277601.07708210491.30653515071.13886315590.85971290051.05067360651.29628507660.72777179000.80658228460.46797861910.4228090206-0.3970723060-0.02370980700.4755282271-0.2058845990-0.3202543890-0.44190134000.46937791421.15951321332.20988769642.66849870340.68000979851.28282534381.61439138290.65382830110.73041921770.85438373641.19714970421.96609549792.67786631553.0913077241 │
│ 50    361     1     AlabamaAutauga County 54571545825476155229549705474754922549035530255448555335576956145179468-259-223175-1939914685236376151639615570637652675667651592606157514560584572585547574563552582-612555-14656712893884024281601919237-32-14-8147327-329-226102-10726658-3208360175343-329-207121-8427355-119435210015-2-11-2-2-2-22045545545545545545545545545545545545511.619238113011.161625786010.390367947011.616774111011.873435010012.249897918012.045146727011.731737865010.637724389010.82974426809.346304209510.163431610010.645569966010.431388998010.65331208709.926954312410.365688488010.14588082609.918959228010.40084350502.27293390310.9981941760-0.25520202001.18538511341.22012292282.32294360511.67945823931.58585703860.71876516150.42890076310.29093553960.00000000000.34634559820.34649718700.41884816750.1270359784-0.05417607200.0360422054-0.2515678070-0.14296692105.9459950905-5.9710160710-4.11968974701.8601427933-1.94855451904.82736717931.0474040632-0.05406330803.73757883966.43351144636.2369306301-5.9710160710-3.77334414902.2066399803-1.52970635104.95440315780.9932279910-0.01802110303.48601103306.2905445253 │
│ 50    361     3     AlabamaBaldwin County 182265182263183121186579190203194978199306203101207787212737218071223565229287858345836244775432837954686495053345494572251421862092216222152261228623132297232223175341829188319021988209920212103232523862543-20357209260227162265210-28-64-226511922732401411652111041186360780289630564175386134374193462252345511591883130883329441540023602440447265352557459784713861009931171410-16-3023072307226322422296233123372276219221712268226811.825804707011.104564443011.225891204011.235556097011.237379071011.127119799011.000561205010.663683126010.515447110010.23292378109.89450906149.99516962069.875876536010.084101815010.43222409209.837230583510.001807269010.793671427010.805278555011.23104237101.93129564511.10939482251.35001466841.15145428170.80515497991.28988921560.9987539356-0.1299883010-0.2898314450-0.99811859101.03868001081.44911381121.24616738620.71522050100.82006525731.02704386600.49462099670.54780784020.28530282860.264987236415.666756830016.221581710021.678120156019.584867760017.082207814020.409454645021.982098525024.298527418024.957204576026.136574422016.705436841017.670695522022.924287543020.300088261017.902273072021.436498511022.476719521024.846335258025.242507404026.4015616580 │
│ 50    361     5     AlabamaBarbour County 27457274542732527344271722694626768263002582825169248872465724589-12919-172-226-178-468-472-659-282-230-6870335301284265274284276273248250131324286293308332279297335322334-611115-9-43-585-21-62-74-840-5-11-8513139866-6914-176-211-141-429-492-650-228-16110-699-187-219-136-416-479-641-220-155161-10216230-1031933193338133913388335331952977281928132778277712.255574457011.042629687010.49558372409.867073761010.326373709010.896255371010.824166127010.907783283010.011303084010.153108882011.853152609010.492332526010.828190251011.468146107012.512248436010.704419890011.647743985013.385008790012.998546746013.56455346600.40242184780.5502971605-0.3326065260-1.6010723460-2.18587472700.1918354819-0.8235778580-2.4772255070-2.9872436620-3.4114445840-0.1829190220-0.4035512510-0.29565024600.18617120300.48993743880.49877225290.35296193890.31964200100.24220894560.24367461320.5121732609-6.4568200160-7.7977752320-5.2500279260-16.1679354800-18.8766114200-25.4916955900-9.1097970270-6.49927337300.40612435530.3292542391-6.8603712670-8.0934254780-5.0638567230-15.6779980400-18.3778391700-25.1387336500-8.7901550260-6.25706442800.6497989684 │
│ 50    361     7     AlabamaBibb County    22915229042285822736226572251022541225532259022532223002231322136-46-122-79-147311237-58-23213-177442652452582532522992672432382493227723627625026624325131423226612-129-183-145616-716-1701219201414117876-59-124-105-1511815-30-80-171-1-166-59-112-86-1313229-19-73-1636-16012-22-4-30-121022242224222422282224224722552204215321472122212111.624336536010.794615910011.424269932011.231715167011.176653213013.246793523011.834581800010.840471092010.669535786011.203851605012.150721586010.398078999012.221312020011.098532774011.797578392010.765788716011.125393378014.007851535010.400555892011.9687732010-0.52638505100.3965369110-0.79704208800.1331823933-0.62092517902.48100480690.7091884225-3.16738044300.2689798938-0.76492159600.52638505070.83713347870.88560232030.62151783530.62092517850.48734022990.31026993480.35688793720.31380987600.2699723278-5.4393121900-4.6262639610-6.68629751800.79909435970.6652769770-1.3291097180-3.5459421120-7.6284796570-0.0448299820-7.4692344030-4.9129271400-3.7891304830-5.80069519801.42061219511.2862021555-0.8417694880-3.2356721780-7.27159172000.2689798938-7.1992620760 │
│ 50    361     9     AlabamaBlount County  57322573225737257561575855763057536575355748757801577705784057879501892445-94-1-48314-3170391817417146476207156756816756736651325695875825916346517217186586974917212765298124-40-4315-32-2-884943153204-41928-100-65-158-90-102357105868720-92-16-115-75-70357145469-6-3-11-4-8-7-2-3-21248948948948948948948948948948948948912.894468951012.401646605011.231176496010.767066669012.427110219011.736885118011.813892166011.681131080011.642591471011.49335891309.901420827810.195751481010.102851191010.263445809011.019283747011.319573647012.507806537012.425262393011.383098348012.04642280002.99304812372.20589512451.12832530490.50362086031.40782647240.4173114708-0.6939143710-0.74413131300.2594931234-0.5530638880-0.13921154100.13895402360.85058369140.74674817220.26070860600.55641529450.00000000000.0692215175-0.06919816600.01728324650.4872403922-1.7369252950-1.1283253050-2.7438653770-1.5642516360-1.77357375106.19318576090.17305379381.00337341061.17526076100.3480288516-1.5979712710-0.2777416140-1.9971172050-1.3035430300-1.21715845706.19318576090.24227531130.93417524441.1925440075 │
│ 50    361     11    AlabamaBullock County 1091410913108761068010610105571066810404103971018110165101449976-37-196-70-53111-264-7-216-16-21-168371721211311231241471291121101145313311711911613313114312095116-16394127-916-14-815-2119818618101415-24-255-81-8394-260-31-213-23-37-170-23-236-73-65100-259-23-203-9-36-16521-10440110-116901690169017791719175716611729166216631705170415.958433847011.366838891012.377757831011.590106007011.769172361014.133935868012.537661580011.009535044010.832635777011.332007952012.339951754010.991075622011.243917419010.930506478012.623386484012.595548291013.898338031011.79593040409.355458171311.53081510903.61848209320.37576326911.13384041200.6595995289-0.85421412301.5383875775-1.3606764510-0.78639536001.4771776060-0.19880715701.76285025050.75152653831.70076061790.56537102470.09491268030.76919378880.97191175041.37619188050.09847850710.4970178926-23.6593059900-7.6092062000-7.84239618308.8574793875-24.6772968900-2.9806259310-20.7017202800-2.2608866610-3.6437047610-16.8986083500-21.8964557400-6.8576796620-6.14163556509.4228504122-24.5823842100-2.2114321430-19.7298085300-0.8846947800-3.5452262540-16.4015904600 │
│ 50    361     13    AlabamaButler County  20947209402093320867206722035920332201682004019911196751950119504-7-66-195-313-27-164-128-129-236-1743662732422402522382362392202052006626427426228727625326927227226609-32-22-35-38-17-30-52-67-66026822284423211916-4-77-170-304-11-153-154-121-205-12452-4-75-164-29611-125-110-98-184-10568-3015-3-1-1-10-2133333333333333333333333333333333333313.062200957011.651700811011.698471887012.386031309011.753086420011.738957421011.964656704011.115040671010.465591178010.255095501012.631578947013.192421580012.770831810014.106313435013.629629630012.584560287013.466496458013.742232102013.886052685013.63927701600.4306220096-1.5407207680-1.0723599230-1.7202821260-1.8765432100-0.8456028650-1.5018397540-2.6271914310-3.4204615070-3.38418151500.09569377990.28888514410.38994906291.08132019371.38271604942.18861918031.15141047781.06098115500.96998162140.8204076400-3.6842105260-8.1850790820-14.8180643900-0.5406600970-7.5555555560-7.6601671310-6.0574203400-10.3571969900-6.33040637102.6663248301-3.5885167460-7.8961939380-14.42811533000.5406600968-6.1728395060-5.4715479510-4.9060098620-9.2962158340-5.36042475003.4867324702 │
│ 50    361     15    AlabamaCalhoun County 118572118533118420117767117227116528115991115550115036114746114298114070113469-113-653-540-699-537-441-514-290-448-228-60131613841356130713161386136513381287127112303111325135914111395145514751393164514861553559-3-104-79-69-110-55-358-215-32333981688477983150-110-113-752-607-660-533-441-500-260-136-15-289-110-713-526-592-449-364-402-229-86-16-279-81-11-3-9-8-2-6-43129332933288229582813279927742762274328303286328511.719527324011.540720189011.182648499011.319505073011.971961769011.839400484011.645820821011.238015403011.131156729010.811333442011.219923196011.566252755012.072469038011.999019435012.567968524012.793491365012.124535429014.364052322013.014082533013.65040718300.4996041272-0.0255325670-0.8898205390-0.6795143620-0.5960067550-0.9540908810-0.4787146080-3.1260369190-1.8829258040-2.83907374100.33024679600.68937930330.58180573680.72252160040.66510898720.85000823990.26982096070.4365973350-0.00875779400.0878970199-6.3678356560-5.1660893470-5.6469380330-4.5845715830-3.8092605630-4.3367767340-2.2630145090-1.1875447510-0.1313669170-2.5402238740-6.0375888600-4.4767100440-5.0651322970-3.8620499830-3.1441515760-3.4867684940-1.9931935490-0.7509474160-0.1401247110-2.4523268540 │
│ 50    361     17    AlabamaChambers County34215341543410534016340883412533964339913374233716335703324432865-49-897237-16127-249-26-146-326-37981400392407425422387380344362334804424764544554454444854154495191-42-84-47-30-23-57-105-71-87-185628342932211818191613-54-7412257-15831-20965-93-255-208-48-4615686-12652-19183-74-239-195-2-10-2-5-2-1-4-10145845845845845845845845845845845845811.743808811011.511805474011.933209212012.483661091012.419983813011.427221591011.266269382010.225009660010.836052324010.104524346012.976908736013.978620933013.311245657013.364860697013.096902362013.110300740014.379317501012.335404096013.440296944015.7013417240-1.2330999250-2.4668154590-1.3780364450-0.8811996060-0.6769185490-1.6830791490-3.1130481190-2.1103944360-2.6042446190-5.59681737700.82206661680.99847292380.85027780630.93994624680.61805606650.53149867860.53366539180.56475344050.47894153920.3932898698-2.17260463003.58275578531.6712356882-4.64098459400.9123684791-6.17129021301.9271250259-2.7643194720-7.6331307810-6.2926379160-1.35053801304.58122870902.5215134945-3.70103834701.5304245457-5.63979153402.4607904177-2.1995660320-7.1541892420-5.8993480460 │
│  │
└────────┴────────┴──────────┴────────┴────────┴─────────┴─────────────────┴───────────────┴───────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────────┴────────────────┴────────────────┴────────────────┴────────────────┴────────────────┴────────────────┴────────────────┴────────────────┴────────────────┴────────────────┴──────────────────────┴──────────────────────┴──────────────────────┴──────────────────────┴──────────────────────┴──────────────────────┴──────────────────────┴──────────────────────┴──────────────────────┴──────────────────────┴──────────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴──────────────┴─────────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴───────────────────────┴───────────────────────┴───────────────────────┴───────────────────────┴───────────────────────┴───────────────────────┴───────────────────────┴───────────────────────┴───────────────────────┴───────────────────────┴──────────────────┴──────────────────┴──────────────────┴──────────────────┴──────────────────┴──────────────────┴──────────────────┴──────────────────┴──────────────────┴──────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┘

Let’s look at how many rows this table has

Code
us_counties_pop.count()

3194

and the number of columns

Code
len(us_counties_pop.columns)
180

That’s a lot of columns and we don’t need most of them. For our purposes, we won’t include most of them and we also need to create new columns. We’ll be using ibis to create more usable data sets from this raw data.

Preparing Usable data from the Raw Data

The raw data has the FIPS codes without leading zeros. We need them to have leading zeros because we’ll see later on, the geo json data that we need to merge with, has the FIPS codes with leading zeros. I could be wrong, but ibis doesn’t have a built-in way to add leading zeros like pandas does with its zfill() method. But, it is relatively easy to implement this once I discovered what ibis’ literal() does and that I can leverage the length() function also. I may submit a feature request to the ibis Github repo.

The transformations that are needed:

  • New column: The state FIPS code is a 2-character code that should have a leading zero when the state code is a single digit code
  • New column: The county FIPS codes is a 3-character code that should have 2 leading zeros when the county code is a single digit code, 1 leading zero when it is a 2 digit code
  • New column: We need the state and county FIPS code combined or concatenated so that we can merge with the geo json data later
  • New column: The population total differnces from 2010 to 2020
  • New column: The population total differences represented as a percentage
  • Transformation: Return only the columns that we will actuall use or need
  • Transformation: Rename/relabel column names to be more self explanatory

Based on the requirements above and leveraging ibis’ literal(), length(), mutate(), select(), and relabel() functions, I was able to satisfy those requirements

Ibis API allows you to “chain” your transformations, which is really nice for readability. You can also comment out one or more lines at a time to iteratively see those changes.

Preparing county level data

Code
npopchg_by_county_2010_2020 = (
    us_counties_pop
    .mutate(state_fips=ibis.literal("0") * (2 - us_counties_pop.state.length()) + us_counties_pop.state)
    .mutate(county_fips=ibis.literal("0") * (3 - us_counties_pop.county.length()) + us_counties_pop.county)
    .mutate(id=_.state_fips + _.county_fips)
    .mutate(npopchg_2010_2020=_.popestimate2020-_.popestimate2010)
    .mutate(npopchg_2010_2020_perc_diff=(_.npopchg_2010_2020 / _.popestimate2010) * 100)
    .select(_.id, _.state_fips, _.county_fips, _.stname, _.ctyname, _.popestimate2010, _.popestimate2020, _.npopchg_2010_2020, _.npopchg_2010_2020_perc_diff)
    .relabel(
        {
            'stname': 'state_name',
            'ctyname': 'county_name'
        }
    )
)

Let’s take a peek at what the transformed data looks like now

Code
npopchg_by_county_2010_2020
┏━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ id      state_fips  county_fips  state_name  county_name      popestimate2010  popestimate2020  npopchg_2010_2020  npopchg_2010_2020_perc_diff ┃
┡━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ stringstringstringstringstringint32int32int64float64                     │
├────────┼────────────┼─────────────┼────────────┼─────────────────┼─────────────────┼─────────────────┼───────────────────┼─────────────────────────────┤
│ 01000 01        000        Alabama   Alabama        478551449215321360182.842286 │
│ 01001 01        001        Alabama   Autauga County 547615614513842.527346 │
│ 01003 01        003        Alabama   Baldwin County 1831212292874616625.210653 │
│ 01005 01        005        Alabama   Barbour County 2732524589-2736-10.012809 │
│ 01007 01        007        Alabama   Bibb County    2285822136-722-3.158632 │
│ 01009 01        009        Alabama   Blount County  57372578795070.883706 │
│ 01011 01        011        Alabama   Bullock County 108769976-900-8.275101 │
│ 01013 01        013        Alabama   Butler County  2093319504-1429-6.826542 │
│ 01015 01        015        Alabama   Calhoun County 118420113469-4951-4.180882 │
│ 01017 01        017        Alabama   Chambers County3410532865-1240-3.635831 │
│  │
└────────┴────────────┴─────────────┴────────────┴─────────────────┴─────────────────┴─────────────────┴───────────────────┴─────────────────────────────┘

Let’s make sure the number of rows match our original raw data set’s number of rows

Code
npopchg_by_county_2010_2020.count()

3194

The data looks like it is in a final form that we can use, let’s go ahead and save it as a Postgres table

Code
con.create_table(name='npopchg_by_county_2010_2020', obj=npopchg_by_county_2010_2020, database='analysis', overwrite=True)
┏━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ id      state_fips  county_fips  state_name  county_name      popestimate2010  popestimate2020  npopchg_2010_2020  npopchg_2010_2020_perc_diff ┃
┡━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ stringstringstringstringstringint32int32int64float64                     │
├────────┼────────────┼─────────────┼────────────┼─────────────────┼─────────────────┼─────────────────┼───────────────────┼─────────────────────────────┤
│ 01000 01        000        Alabama   Alabama        478551449215321360182.842286 │
│ 01001 01        001        Alabama   Autauga County 547615614513842.527346 │
│ 01003 01        003        Alabama   Baldwin County 1831212292874616625.210653 │
│ 01005 01        005        Alabama   Barbour County 2732524589-2736-10.012809 │
│ 01007 01        007        Alabama   Bibb County    2285822136-722-3.158632 │
│ 01009 01        009        Alabama   Blount County  57372578795070.883706 │
│ 01011 01        011        Alabama   Bullock County 108769976-900-8.275101 │
│ 01013 01        013        Alabama   Butler County  2093319504-1429-6.826542 │
│ 01015 01        015        Alabama   Calhoun County 118420113469-4951-4.180882 │
│ 01017 01        017        Alabama   Chambers County3410532865-1240-3.635831 │
│  │
└────────┴────────────┴─────────────┴────────────┴─────────────────┴─────────────────┴─────────────────┴───────────────────┴─────────────────────────────┘

So now we’re done creating our data set at the county level. Now, let’s prepare our data at the state level.

Preparing state level data

Let’s recall what our npopchg_by_county_2010_2020 data looks like that we created earlier:

Code
npopchg_by_county_2010_2020
┏━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ id      state_fips  county_fips  state_name  county_name      popestimate2010  popestimate2020  npopchg_2010_2020  npopchg_2010_2020_perc_diff ┃
┡━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ stringstringstringstringstringint32int32int64float64                     │
├────────┼────────────┼─────────────┼────────────┼─────────────────┼─────────────────┼─────────────────┼───────────────────┼─────────────────────────────┤
│ 01000 01        000        Alabama   Alabama        478551449215321360182.842286 │
│ 01001 01        001        Alabama   Autauga County 547615614513842.527346 │
│ 01003 01        003        Alabama   Baldwin County 1831212292874616625.210653 │
│ 01005 01        005        Alabama   Barbour County 2732524589-2736-10.012809 │
│ 01007 01        007        Alabama   Bibb County    2285822136-722-3.158632 │
│ 01009 01        009        Alabama   Blount County  57372578795070.883706 │
│ 01011 01        011        Alabama   Bullock County 108769976-900-8.275101 │
│ 01013 01        013        Alabama   Butler County  2093319504-1429-6.826542 │
│ 01015 01        015        Alabama   Calhoun County 118420113469-4951-4.180882 │
│ 01017 01        017        Alabama   Chambers County3410532865-1240-3.635831 │
│  │
└────────┴────────────┴─────────────┴────────────┴─────────────────┴─────────────────┴─────────────────┴───────────────────┴─────────────────────────────┘

The data above has data broken up at the state and county level (“too granular”), but what we need instead, is to “roll the data up” or combine the population totals at the state level. To do this requires what is known as aggregating the data. What we need to accomplish this is to tell ibis to calculate the sum of the 2010 population totals for each state (the sums are aggregated or grouped by state) and do the same for the 2020 population totals.

Now, let’s make 2 data sets from this data set: one which has aggregate sum for year 2010 and the other has aggregate sum for year 2020. Then we will merge them together.

Code
npopchg_2010 = npopchg_by_county_2010_2020.group_by(npopchg_by_county_2010_2020.state_name).aggregate(sum=npopchg_by_county_2010_2020.popestimate2010.sum()).relabel({"sum": "popestimate2010_sum"})
npopchg_2020 = npopchg_by_county_2010_2020.group_by(npopchg_by_county_2010_2020.state_name).aggregate(sum=npopchg_by_county_2010_2020.popestimate2020.sum()).relabel({"sum": "popestimate2020_sum"})

Here’s what our 2010 data looks like:

Code
npopchg_2010.head()
┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
┃ state_name      popestimate2010_sum ┃
┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
│ stringint64               │
├────────────────┼─────────────────────┤
│ Oklahoma      7520028 │
│ North Carolina19149172 │
│ Colorado      10095078 │
│ Mississippi   5941230 │
│ Florida       37692286 │
└────────────────┴─────────────────────┘

Here’s what our 2020 data looks like:

Code
npopchg_2020.head()
┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
┃ state_name      popestimate2020_sum ┃
┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
│ stringint64               │
├────────────────┼─────────────────────┤
│ Oklahoma      7961566 │
│ North Carolina21201646 │
│ Colorado      11615438 │
│ Mississippi   5933572 │
│ Florida       43466624 │
└────────────────┴─────────────────────┘

Now we’ll merge them together based on state name using ibis’ inner_join() method and create additional columns using its mutate() method

Code
npopchg_by_state_2010_2020 = (
    npopchg_2010
    .inner_join(npopchg_2020, npopchg_2010.state_name == npopchg_2020.state_name)
    .mutate(popestimate_diff=_.popestimate2020_sum - _.popestimate2010_sum)
    .mutate(popestimate_perc_diff=(_.popestimate_diff / _.popestimate2010_sum) * 100)
    .order_by(_.state_name)
)
Code
npopchg_by_state_2010_2020
┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┓
┃ state_name            popestimate2010_sum  popestimate2020_sum  popestimate_diff  popestimate_perc_diff ┃
┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━┩
│ stringint64int64int64float64               │
├──────────────────────┼─────────────────────┼─────────────────────┼──────────────────┼───────────────────────┤
│ Alabama             957102898430642720362.842286 │
│ Alaska              14279641462316343522.405663 │
│ Arizona             1281468414842802202811815.826516 │
│ Arkansas            584399660610442170483.714034 │
│ California          746391007873615640970565.489155 │
│ Colorado            1009507811615438152036015.060409 │
│ Connecticut         71583467114012-44334-0.619333 │
│ Delaware            179929419736181743249.688467 │
│ District of Columbia1210564142563221506817.765934 │
│ Florida             3769228643466624577433815.319681 │
│  │
└──────────────────────┴─────────────────────┴─────────────────────┴──────────────────┴───────────────────────┘

As can be seen from above, we’ve added 2 new columns: popestimate_diff contains the population gain or loss from 2010 to 2020 and another column representing this as a percentage (popestimate_perc_diff).

Let’s see how many rows we have

Code
npopchg_by_state_2010_2020.count()

51

If you’re wondering why we have 51 states and not 50 states, it is because “District of Columbia” was also included as a state.

Looks like our data has what we need with regards to sums, differences, and percent differences at the state level for 2010 and 2020. So we’ll save this as a Postgres table.

Code
con.create_table(name='npopchg_by_state_2010_2020', obj=npopchg_by_state_2010_2020, database='analysis', overwrite=True)
┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┓
┃ state_name            popestimate2010_sum  popestimate2020_sum  popestimate_diff  popestimate_perc_diff ┃
┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━┩
│ stringint64int64int64float64               │
├──────────────────────┼─────────────────────┼─────────────────────┼──────────────────┼───────────────────────┤
│ Alabama             957102898430642720362.842286 │
│ Alaska              14279641462316343522.405663 │
│ Arizona             1281468414842802202811815.826516 │
│ Arkansas            584399660610442170483.714034 │
│ California          746391007873615640970565.489155 │
│ Colorado            1009507811615438152036015.060409 │
│ Connecticut         71583467114012-44334-0.619333 │
│ Delaware            179929419736181743249.688467 │
│ District of Columbia1210564142563221506817.765934 │
│ Florida             3769228643466624577433815.319681 │
│  │
└──────────────────────┴─────────────────────┴─────────────────────┴──────────────────┴───────────────────────┘

When using Plotly and to plot at the state level, we need to obtain 2-character state codes. Our table we just created doesn’t have it, but luckily we have a table that maps state name to its corresponding 2-character state code in our Postgres database.

Code
us_states = con.table('us_states')
Code
us_states
┏━━━━━━━━━━━━━┳━━━━━━━━━━━━┓
┃ state_name   state_code ┃
┡━━━━━━━━━━━━━╇━━━━━━━━━━━━┩
│ stringstring     │
├─────────────┼────────────┤
│ Alabama    AL         │
│ Alaska     AK         │
│ Arizona    AZ         │
│ Arkansas   AR         │
│ California CA         │
│ Colorado   CO         │
│ ConnecticutCT         │
│ Delaware   DE         │
│ Florida    FL         │
│ Georgia    GA         │
│           │
└─────────────┴────────────┘
Code
us_states.count()

51

Let’s confirm it has District of Columbia and its corresponding state code

Code
us_states.filter(us_states.state_name.contains("District"))
┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┓
┃ state_name            state_code ┃
┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━┩
│ stringstring     │
├──────────────────────┼────────────┤
│ District of ColumbiaDC         │
└──────────────────────┴────────────┘

Now, we’ll merge our table we created earlier to this state name to state code table so that our data will have the 2-character state code included

Code
npopchg_by_state_2010_2020 = npopchg_by_state_2010_2020.inner_join(us_states, npopchg_by_state_2010_2020.state_name == us_states.state_name)
Code
npopchg_by_state_2010_2020
┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┓
┃ state_name            popestimate2010_sum  popestimate2020_sum  popestimate_diff  popestimate_perc_diff  state_code ┃
┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━┩
│ stringint64int64int64float64string     │
├──────────────────────┼─────────────────────┼─────────────────────┼──────────────────┼───────────────────────┼────────────┤
│ Alabama             957102898430642720362.842286AL         │
│ Alaska              14279641462316343522.405663AK         │
│ Arizona             1281468414842802202811815.826516AZ         │
│ Arkansas            584399660610442170483.714034AR         │
│ California          746391007873615640970565.489155CA         │
│ Colorado            1009507811615438152036015.060409CO         │
│ Connecticut         71583467114012-44334-0.619333CT         │
│ Delaware            179929419736181743249.688467DE         │
│ District of Columbia1210564142563221506817.765934DC         │
│ Florida             3769228643466624577433815.319681FL         │
│           │
└──────────────────────┴─────────────────────┴─────────────────────┴──────────────────┴───────────────────────┴────────────┘

Population Difference by State

Now that we have our population difference between 2020 and 2010 by 2-character state code, we can finally start making a Plotly plot.

Using Plotly with ibis table expressions isn’t supported, but we can easily convert our ibis table expression to a pandas dataframe using to_pandas() method

Code
df_npopchg_by_state_2010_2020 = npopchg_by_state_2010_2020.to_pandas()
Code
type(df_npopchg_by_state_2010_2020)
pandas.core.frame.DataFrame

Let’s create a Plotly choropleth map of the population difference from 2010 to 2020

Code
import plotly.express as px

fig = px.choropleth(
    df_npopchg_by_state_2010_2020,
    locations='state_code', 
    locationmode="USA-states", 
    scope="usa",
    color='popestimate_diff',
    color_continuous_scale="YlGnBu",
    title='2010 to 2020 Population Change',
    labels={'state_code': 'State', 'popestimate_diff': 'Population Diff.'},
    width=1200,
    height=600,
)
fig.show()

But as an experienced data analyst, you do wonder if using straight quantity of the population change really tell us much. Does it really show an apples-to-apples comparison? How about we use the percent change in population instead so that there is an even playing field or baseline to compare one state to another?

Code
import plotly.express as px

fig = px.choropleth(
    df_npopchg_by_state_2010_2020,
    locations='state_code', 
    locationmode="USA-states", 
    scope="usa",
    color='popestimate_perc_diff',
    color_continuous_scale="YlGnBu",
    title='2010 to 2020 % Population Change',
    labels={'state_code': 'State', 'popestimate_perc_diff': '2010 to 2020 % Diff'},
    width=1200,
    height=600,
)
fig.show()

Now, let’s look at population difference by County

Per Plotly’s documenation, we need to obtain geo json data

Code
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

Let’s refresh our memory what our county level data looks like:

Code
npopchg_by_county_2010_2020
┏━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ id      state_fips  county_fips  state_name  county_name      popestimate2010  popestimate2020  npopchg_2010_2020  npopchg_2010_2020_perc_diff ┃
┡━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ stringstringstringstringstringint32int32int64float64                     │
├────────┼────────────┼─────────────┼────────────┼─────────────────┼─────────────────┼─────────────────┼───────────────────┼─────────────────────────────┤
│ 01000 01        000        Alabama   Alabama        478551449215321360182.842286 │
│ 01001 01        001        Alabama   Autauga County 547615614513842.527346 │
│ 01003 01        003        Alabama   Baldwin County 1831212292874616625.210653 │
│ 01005 01        005        Alabama   Barbour County 2732524589-2736-10.012809 │
│ 01007 01        007        Alabama   Bibb County    2285822136-722-3.158632 │
│ 01009 01        009        Alabama   Blount County  57372578795070.883706 │
│ 01011 01        011        Alabama   Bullock County 108769976-900-8.275101 │
│ 01013 01        013        Alabama   Butler County  2093319504-1429-6.826542 │
│ 01015 01        015        Alabama   Calhoun County 118420113469-4951-4.180882 │
│ 01017 01        017        Alabama   Chambers County3410532865-1240-3.635831 │
│  │
└────────┴────────────┴─────────────┴────────────┴─────────────────┴─────────────────┴─────────────────┴───────────────────┴─────────────────────────────┘

Looks good, so now convert this ibis table expresssion to pandas dataframe:

Code
df_npopchg_by_county_2010_2020 = npopchg_by_county_2010_2020.to_pandas()

First, let’s plot straight qty of the population difference from 2010 to 2020

Code
import plotly.express as px

fig = px.choropleth(
    df_npopchg_by_county_2010_2020,
    geojson=counties,
    locations='id',
    color='npopchg_2010_2020',
    color_continuous_scale="YlGnBu",
    range_color=(500, 100000),
    scope="usa",
    hover_data=['state_name','county_name'],
    labels={'state_name': 'State', 'county_name': 'County', 'npopchg_2010_2020': '2010 to 2020 diff'},
    width=1200,
    height=600,
    title='2010 to 2020 Population Change',
)
fig.show()

Now, let’s plot using percent change instead

Code
import plotly.express as px

fig = px.choropleth(
    df_npopchg_by_county_2010_2020,
    geojson=counties,
    locations='id',
    color='npopchg_2010_2020_perc_diff',
    color_continuous_scale="YlGnBu",
    range_color=(-20, 80),
    scope="usa",
    hover_data=['state_name','county_name'],
    labels={'state_name': 'State', 'county_name': 'County', 'npopchg_2010_2020_perc_diff': '2010 to 2020 % diff'},
    width=1200,
    height=600,
    title='2010 to 2020 % Population Change',
)
fig.show()

counties